##########################################
## "Voters get what they want"          ##
##########################################
## Heinrich, Kobayashi, Long            ##
##########################################

## Prep Lake/Nielsen data 
## Script 01
## June 29, 2017


## Prep Nielsen data
####################
## Load Nielsen replication data and merge Lake's data in
data <- read.dta("data/dat2.dta")
lake <- read.dta("data/Lake_HIR_country_yearreplication.dta")
colnames(lake)[3] <- "countrynumcode_g"
## Retain only U.S. observations
data <- subset(data, donorname == "United States")
data <- merge(data, lake[, c("year", "countrynumcode_g", "us_SH1995", "us_EH1995")],
              by=c("year", "countrynumcode_g"), all.x=TRUE)
data <- subset(data, inmysample == 1)
data$ColdWarSocialist <- data$ColdWar * data$socialist
## Rename
data$llnNYT_HR <- data$llnnytimes

## Add Seguin/Gorman data
sg <- read.csv("data/GormanSeguin_replication.csv")
sg <- sg[, c("ccode", "year", "article_count_nyt")]
sg$year <- sg$year + 1
colnames(sg)[1] <- "countrynumcode_g"
colnames(sg)[3] <- "llnNYT_leader"
sg$llnNYT_leader <- sg$llnNYT_leader + 0.25
sg <- ddply(.data=sg, .variables=c("countrynumcode_g", "year"),
            .fun=function(x) data.frame(llnNYT_leader=mean(c(max(x$llnNYT_leader, na.rm=T), 
                                                             sum(x$llnNYT_leader, na.rm=T)))))
data <- merge(data, sg, by=c("countrynumcode_g", "year"), all.x=T)
subset(data, is.infinite(llnNYT_leader))[, c("countryname", "year")]
data$llnNYT_leader <- log(data$llnNYT_leader)
## NA errors only affect one observations (ccode=750, year=1996)

## Rescaling both NYT variables
data$llnNYT_leader <- (data$llnNYT_leader - min(data$llnNYT_leader, na.rm=TRUE)) / max(data$llnNYT_leader, na.rm=TRUE) * 10
data$llnNYT_HR <- (data$llnNYT_HR - max(data$llnNYT_HR, na.rm=TRUE)) / max(data$llnNYT_HR, na.rm=TRUE) * 10

## Coding for MCMCglmm Tobit
data$lnaidpc_l <- ifelse(data$lnaidpc == 0, -Inf, data$lnaidpc)

## Retain variables that we need eventually for the two hierarchy variables
data_SH <- data[, c("countrynumcode_g", "year", "llnNYT_HR", "lphysint", "polity2", "llnaidpc", "lnworldaidtotal",
                    "lln_rgdpc", "lln_population", "lln_trade", "lalliance", "dyad_colony", "socialist", 
                    "ColdWar", "lwar", "lratpercent", "ldonor_physint", "ldonorallyneighbor2", "ls3un",
                    "llnreftotal", "us_SH1995", "ColdWarSocialist", "lnaidpc_l", 
                    "llneconaidpc", "lnaidpc", "llnNYT_leader")]
data_SH <- na.omit(data_SH)
## Rescaling the hierarchy variables
data_SH$us_SH1995 <- data_SH$us_SH1995 / max(data_SH$us_SH1995)


data_EH <- data[, c("countrynumcode_g", "year", "llnNYT_HR", "lphysint", "polity2", "llnaidpc", "lnworldaidtotal",
                    "lln_rgdpc", "lln_population", "lln_trade", "lalliance", "dyad_colony", "socialist", 
                    "ColdWar", "lwar", "lratpercent", "ldonor_physint", "ldonorallyneighbor2", "ls3un",
                    "llnNYT_leader", "llnreftotal", "us_EH1995", "ColdWarSocialist", "lnaidpc_l", 
                    "llneconaidpc", "lnaidpc")]
data_EH <- na.omit(data_EH)
data_EH$us_EH1995 <- data_EH$us_EH1995 / max(data_EH$us_EH1995)


## Some stats
#############
## Economic hierarchy
## 1,664 observations, 89 recipients
nrow(data_EH)
length(unique(data_EH$countrynumcode_g))
range(data_EH$year)
sort(countrycode(sourcevar=unique(data_EH$countrynumcode_g), origin="cown", destination="country.name"))


## Security hierarchy 
## 2,025 observations, 110 countries
nrow(data_SH)
length(unique(data_SH$countrynumcode_g))
sort(countrycode(sourcevar=unique(data_SH$countrynumcode_g), origin="cown", destination="country.name"))
range(data_SH$year)

save(data_EH, file="output/data_EH.Rdata")
save(data_SH, file="output/data_SH.Rdata")
